'''
将文本文件转为utf-8类型
将原文件一句一句读取,转码为utf-8写入新文件
'''

'''
获取文本编码格式
'''

from chardet.universaldetector import UniversalDetector
import chardet
def GetEncoding(file):
    """
    获取文本文件的编码类型
    :param file:
    :return: 返回值是字典 {'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}
    """
    try:
        detector = UniversalDetector()
        with open(file, 'rb') as f:
            for line in f.readlines():
                detector.feed(line)
                if detector.done:
                    # break
                    pass
            detector.close()
        return detector.result
    except OSError as e:
        print(e)
        print("打开文件失败！")
   

def File_Encoding_to_UTF(file):
    """
    将文本文件转为utf-8类型
    将原文件一句一句读取,转码为utf-8写入新文件
    """
    r = GetEncoding(file)
    print(f'{r} {type(r)}')

    f_context = []
    try: # 读入数据
        with open(file, 'r', encoding=r['encoding']) as f:
            for line in f.readlines():
                f_context.append(line)
    except OSError as e:
        print(e)
        print("打开文件失败！")

    try: # 写数据
        with open(file, 'w', encoding='utf-8') as f:
            for line in f_context:
                f.write(line)
    except OSError as e:
        print(e)
        print("打开文件失败！")
if __name__ == '__main__':
    File_Encoding_to_UTF('bb211222b0.csv')